//first import all the sdf files and genereate inchi -> hdmdb codes connection

/**
 * this files the hmdb database
 */

import edu.ucdavis.genomics.metabolomics.binbase.connector.references.hmdb.HMDBContentResolver
import edu.ucdavis.genomics.metabolomics.binbase.connector.references.hmdb.HMDBSDFParser
import org.apache.log4j.Logger
import java.util.concurrent.ThreadPoolExecutor.AbortPolicy

//configuration file
def config = new ConfigSlurper().parse(new File('grails-app/conf/PathConfiguration.groovy').toURL())

Logger logger = Logger.getLogger("ImportHMDB")

logger.info("starting import/update of the hmdb data")


String tempDir = config.outputDirectory + File.separator + "hmdb-sdf"

logger.info "using dir: ${tempDir}"
/**
 * parses all the files in the hmdb-sdf directory
 */

new File(tempDir).listFiles().each {File hmdb ->

  def value = FileHelper.fileAccessible(hmdb)

  if (value) {
    if (hmdb.getName().endsWith(".txt")) {

      try {
        logger.info("reading file: ${hmdb}")
        HMDBSDFParser resolver = new HMDBSDFParser();

        resolver.prepare(hmdb)

        logger.debug "checking if inchi is acceptable"
        if (resolver.getInchi() != null && resolver.getInchi().trim().size() > 0) {
          //logger.debug "loading compound from library ${resolver.getInchi()}"

          Compound compound = CompoundHelper.getCompound(resolver.getInchi(), resolver.getInchiKey(), logger)

          CompoundHelper.updateHMDB(HMDB.findAllByCompound(compound), logger, compound, resolver.getHmdbId())

          //save our compound
          compound = CompoundHelper.saveCompound(compound, logger)


          CompoundHelper.aquireStatistic()
          //this file was imported and should be skipped from the next import
          FileHelper.markFileAsImported(hmdb, logger)
        } else {
          logger.warn "ignoring this file: ${hmdb}"
        }

      }
      catch (edu.ucdavis.genomics.metabolomics.binbase.connector.references.exception.InvalidInchiException e) {
        logger.warn "ignoring this file: ${hmdb}"
      }
      catch (Exception e) {
        logger.error(e.getMessage(), e)
      }
    }
    else {
      logger.debug("invalid file: ${hmdb}")
    }
  }
}


println "done with import of hmdb sdf files"
//second import all the hmdb content files

println "starting with import of hmdb files"

tempDir = config.outputDirectory + File.separator + "hmdb"

/**
 * parses all the files in the hmdb-sdf directory
 */
new File(tempDir).listFiles().each {File hmdb ->

  if (hmdb.isFile() && hmdb.exists()) {
    if (hmdb.getName().endsWith(".txt")) {

      try {
        logger.info("reading file: ${hmdb}")
        HMDBContentResolver resolver = new HMDBContentResolver();

        resolver.prepare(hmdb)

        if (resolver.getHmdbID() != null) {

          HMDB hmdbComp = HMDB.findByHmdbId(resolver.hmdbID)

          if (hmdbComp != null) {
            Compound compound = Compound.findByInchi(hmdbComp.compound.inchi)
            logger.info "found compound ${compound.id}"

            defineFormula(compound, logger, resolver)

            CompoundHelper.updatesTheCasNumber(resolver.getCasNumber(), compound, logger)

            if (resolver.getPubchemCompoundId() != null) {
              if (!resolver.getPubchemCompoundId().equals("Not Available")) {
                CompoundHelper.updateCID(PubchemCompound.executeQuery("select a from PubchemCompound a where a.cid = ?", [Integer.parseInt(resolver.getPubchemCompoundId())]), logger, compound, Integer.parseInt(resolver.getPubchemCompoundId()))
              }
            }

            if (resolver.getIUPAC()) {
              List<String> list = new Vector<String>()
              if (!resolver.getIUPAC().equals("Not Available")) {

                list.add(resolver.getIUPAC())

                CompoundHelper.updateIUPACNames(list, compound, logger)
              }
            }

            if (resolver.getCheBiId()) {
              List<String> res = resolver.getCheBiId()

              for (String chebi: res) {
                if (!chebi.equals("Not Available")) {
                  logger.debug "working on chebi id ${chebi}"
                  CompoundHelper.updateChebiId(Chebi.executeQuery("select a from Chebi a where a.chebiId = ?", [chebi]), logger, compound, chebi)

                }
              }
            }

            if (resolver.getMetageneId()) {
              List<String> res = resolver.getMetageneId()

              for (String val: res) {
                if (!val.equals("Not Available")) {
                  //uggly hack should find a better solution than to loose the character
                  val = val.replaceAll("\uFB02", "")
                  val = val.replaceAll("\u0152", "")
                  val = val.replaceAll("\u2013", "")
                  val = val.replaceAll("\u2014", "")
                  val = val.replaceAll("\u2019", "")
                  val = val.replaceAll("\u201c", "")
                  val = val.replaceAll("\u201d", "")
                  val = val.replaceAll("\u2022", "")
                  val = val.replaceAll("\u2026", "")
                  val = val.replaceAll("\u2030", "")
                  val = val.replaceAll("\u2039", "")
                  val = val.replaceAll("\u2122", "")
                  if (val.indexOf(":") > -1) {
                    //find out if we got more than one metagene id
                    if (val.indexOf(",") > -1 && val.count(":") > 1) {
                      String[] con = val.split(",")
                      for (String c: con) {
                        logger.debug "working on metagene id ${c}"
                        CompoundHelper.updateMetageneId(Metagene.executeQuery("select a from Metagene a where a.metageneId = ?", [c]), logger, compound, c)
                      }
                    }
                    else {
                      logger.debug "working on metagene id ${val}"
                      CompoundHelper.updateMetageneId(Metagene.executeQuery("select a from Metagene a where a.metageneId = ?", [val]), logger, compound, val)

                    }
                  }
                }
              }
            }

            if (resolver.getMetlinId()) {
              if (!resolver.getMetlinId().equals("Not Available")) {
                logger.debug "working on metlin id ${resolver.getMetlinId()}"
                CompoundHelper.updateMetlinId(Metlin.executeQuery("select a from Metlin a where a.metlinId = ?", [resolver.getMetlinId()]), logger, compound, resolver.getMetlinId())
              }
            }

            if (resolver.getDescription()) {
              if (!resolver.getDescription().equals("Not Available")) {
                hmdbComp.description = resolver.getDescription()
              }
            }

            if (resolver.getName()) {
              if (!resolver.getName().equals("Not Available")) {

                List<String> list = new Vector<String>()
                list.add(resolver.getName())
                CompoundHelper.addSynonym(list, logger, compound)
              }
            }

            compound = CompoundHelper.saveCompound(compound, logger)

            CompoundHelper.aquireStatistic()

            //this file was imported and should be skipped from the next import
            FileHelper.markFileAsImported(hmdb, logger)
          }
          else {
            logger.info "no hmdb found for this hmdb id ${resolver.hmdbID}, ignoring it"
          }
        } else {
          logger.warn "ignoring this file: ${hmdb}"
        }
      }

      catch (edu.ucdavis.genomics.metabolomics.binbase.connector.references.exception.InvalidInchiException e) {
        logger.warn "ignoring this file: ${hmdb}"
      }
      catch (Exception e) {
        logger.error(e.getMessage(), e)
      }
    }
  }
}

/**
 * defines the formula for this compound
 */
private def defineFormula(Compound compound, Logger logger, HMDBContentResolver resolver) {
  if (compound.getFormula() != null) {
    logger.info "compound already has formula"
    if (resolver.getChemicalFormula() != null) {
      logger.info "resolver also has formula"
      if (resolver.getChemicalFormula().equals(compound.getFormula())) {
        logger.info "both have the some formula so they should be the same"
      }
      else {
        logger.warn "possible error in database, different molecular formula"
      }
    }
    else {
      logger.info "resolver has no formula => skip"
    }
  }
  else {
    if (resolver.getChemicalFormula() != null) {
      logger.info "defining new formula: ${resolver.getChemicalFormula()}"
      compound.setFormula(resolver.getChemicalFormula());
    }
    else {
      logger.info "resolver has no formula => skip"
    }
  }
}
